# install.packages(c("tidyverse", "xtable", "memisc", "effects", "stargazer", "lme4"))

library(xtable)
library(ggplot2)
library(dplyr)
library(readr)
library(haven)
library(scales)
library(memisc)
library(effects)
library(lubridate)
library(stargazer)
library(lme4)

# define theme
theme_pew <- function(base_size = 12, base_family = "") {
	# Starts with theme_grey and then modify some parts
	theme_grey(base_size = base_size, base_family = base_family) %+replace%
		theme(
			axis.line = element_line(colour = "black"),
			axis.text         = element_text(size = rel(0.8)),
			axis.ticks        = element_line(colour = "black"),
			legend.key        = element_rect(colour = "grey80"),
			panel.background = element_blank(),
			panel.border      = element_rect(fill = NA, colour = "grey50"),
			panel.grid.major = element_blank(),
			panel.grid.minor = element_blank(),
			axis.line.x = element_line(color="black", size = .3),
			axis.line.y = element_line(color="black", size = .3),
			strip.background  = element_rect(fill = "grey80", colour = "grey50", size = 0.2)
		)
}

# function to output to latex
mt2tex <- function(out){
	latexout <- toLatex(out)
	latexout <- gsub("&\\s*&", "&", latexout)
	latexout <- gsub("&", " & ", latexout)
	latexout <- gsub("_", "\\\\_", latexout)
	latexout <- gsub("\\}cD\\{", "\\}D\\{", latexout)
	latexout
}

cbColor = c("#E69F00", "#009E73", "#0072B2")
cbColorFull <- c("#E69F00", "#009E73", "#0072B2", "#D55E00", "#CC79A7")

setwd("~/Dropbox/projects/aggregtor")

##############################################
### Web traffic measured via Alexa instead ###
##############################################
setwd("~/workspace/aggregator/")
dataalexa <- read.csv("data/alexa.csv",header=T,as.is=T)
dataalexa$Date <- as.Date(dataalexa$Date, "%m/%d/%y")

library(reshape)
dataalexa <- melt(dataalexa, id=c("Date"))
names(dataalexa) <- c("Date","Source","Reach")

dataalexa <- dataalexa[dataalexa$Date > as.Date("2016/08/01"),]
dataalexa <- dataalexa[dataalexa$Date < as.Date("2016/11/07"),]

# levels(dataalexa$Source)[1] <- "top forecaster"

p2 <- ggplot(data=dataalexa, aes(x=Date, y=Reach, color=Source)) +
	scale_color_manual(values=c("#F8766D","#619CFF","#00BA38"))  +
	geom_smooth(se=F, span=1)  +
	geom_point(alpha=.25) +
	theme_pew() + 
	xlab("") +
	ggtitle("Web traffic: Forecasters\n and mainstream media")+
	theme(axis.title.x = element_text(size=7)) +
	theme(axis.title.y = element_text(size=10)) +
	theme(axis.text.y = element_text(size=6)) +
	theme(panel.background = element_blank(),
		  legend.text=element_text(size=5),
		  legend.position = "bottom",
		  legend.title=element_blank(),
		  legend.margin=margin(t = -.5, unit='cm')) +
	coord_cartesian(ylim = c(0, .012))

p2
ggsave("figures/alexa_web_traffic.pdf", plot =p2, width = 3, height = 3)

#############################
## ANES Win by quite a bit ##
#############################

## FOR MC BLOGPOST
load("data/anescdf16.RData")
table(anescdf16$closerace, anescdf16$year)
anescdf16$closerace[anescdf16$closerace=='DK'] <- NA

table(anescdf16$mode)

anescdf16$party <- factor(anescdf16$party, 
						  levels = c('Strong Dem','Dem','Lean Dem','Ind','Lean Rep','Rep','Strong Rep'))
anescdf16$pid3 <- anescdf16$party
levels(anescdf16$pid3) <- c("Dem", "Dem", "Dem", "Ind/Oth", "Rep", "Rep", "Rep")
# levels(anescdf16$pid3) <- c("Dem", "Dem", "Ind/Oth", "Ind/Oth", "Ind/Oth", "Rep", "Rep")

plotdat <- anescdf16 %>% group_by(year, pid3) %>% 
	summarise(m = weighted.mean(closerace == "Quite a bit", 
								na.rm=T, w = weight))
plotdat <- na.omit(plotdat)

polcol3 <- c("#3929AA","#AAAAAA","#AA3929")

plotdat <- plotdat[as.numeric(as.character(plotdat$year)) >= 2000,]

ggplot(plotdat, 
	   aes(x=year, y=m, group = pid3, color = pid3)) + 
	geom_line(size=1) +
	geom_point() +
	ylab("Percent expecting leading candidate\nwill 'win by quite a bit'") +  
	xlab("Year") +
	ggtitle("") + 
	annotate("segment", x = 2012, xend = 2012, y = .3, yend = .21, colour = polcol3[1]) +
	annotate("segment", x = 2008, xend = 2008, y = .35, yend = .24, colour = polcol3[2]) +
	annotate("segment", x = 2004, xend = 2004, y = .3, yend = .22, colour = polcol3[3]) +
	geom_label(aes(x = 2012, y = .3, label = "Democrats"), colour = polcol3[1], fill="white",label.size=NA) +
	geom_label(aes( x = 2008, y = .35, label = "Independents/\nOthers"), colour = polcol3[2], fill="white",label.size=NA) +
	geom_label(aes(x = 2004, y = .3, label = "Republicans"), colour = polcol3[3], fill="white",label.size=NA) +
	theme_pew() +
	scale_colour_manual(values=polcol3)+
	scale_fill_manual(values=polcol3)+
	scale_y_continuous(labels = scales::percent, limits = c(0, .4)) +
	scale_x_continuous(breaks = seq(2000, 2016, 4), limits = c(2000, 2016)) + theme(legend.position="none")
ggsave(filename="figures/anes_turnout_closerace_MC.pdf", width=5,height=3)

with(anescdf16[anescdf16$year==2016,], table(pid3))

summary( lm(voted ~ closerace * I(year==2016)* pid3  + voted_prev, data = anescdf16, weights = weight) )

plotdat <- anescdf16 %>% group_by(year, pid3, closerace) %>% 
	summarise(m = weighted.mean(voted, na.rm=T, w = weight))
plotdat <- na.omit(plotdat)

polcol3 <- c("#3929AA","#AAAAAA","#AA3929")

ggplot(plotdat[as.numeric(as.character(plotdat$year)) >= 2000,], 
	   aes(x=year, y=m, group = closerace, color = closerace)) + 
	geom_line() +
	geom_point() +
	ylab("") +  
	xlab("") +
	# annotate("text", x = 2011, y = .27, label = "Democrats", size = 4, col = polcol3[1]) +
	# annotate("text", x = 2009, y = .18, label = "Indep./Other", size = 4, col = polcol3[2]) +
	# annotate("text", x = 2008, y = .10, label = "Republicans", size = 4, col = polcol3[3]) +
	ggtitle("Percent saying they voted for president") + 
	theme_pew() +
	facet_wrap(~pid3, ncol = 1, scales = "free_y") +
	scale_colour_manual(values=polcol3)+
	scale_fill_manual(values=polcol3)+
	# scale_y_continuous(labels = scales::percent, limits = c(0, .4)) +
	theme(legend.position="none", 
		  panel.border = element_blank())
ggsave(filename="figures/anes_turnout_closerace_party.pdf", width=5,height=4)
ggsave(filename="figures/anes_turnout_closerace_party.png", width=5,height=2.5)









# plot ANES percent of public saying "will win quite a bit"

winners <-read.csv("data/electionwinners.csv")

# process percent victory and margin numbers: 
winners$EC_pct <- as.numeric(gsub(pattern = "%", replacement = "", winners$EC_Percent))/100
winners$EC_pct_margin <- winners$EC_pct - .5
winners$EC_pct_abs_margin <- abs(winners$EC_pct_margin)

winners$PV_pct <- as.numeric(gsub(pattern = "%", replacement = "", winners$PV_Percent))/100
winners$PV_pct_margin <- winners$PV_pct - .5
winners$PV_pct_abs_margin <- abs(winners$PV_pct_margin)

# join to ANES data by year
anescdf16$year <- factor(anescdf16$year)
anescdf16 <- merge(anescdf16, winners,by="year")

# model association between turnout and perceptions that race is close: 
m1 <- lm(voted ~ closerace, data = anescdf16, weights = weight)
m2 <- lm(voted ~ closerace + voted_prev + party, data = anescdf16, weights = weight)
m3 <- lm(voted ~ closerace + voted_prev + party  + EC_pct_abs_margin + PV_pct_abs_margin, data = anescdf16, weights = weight)
m4 <- lm(voted ~ closerace + voted_prev + party + year, data = anescdf16, weights = weight)
m5 <- lm(voted ~ closerace + voted_prev + pid3 + year, data = anescdf16, weights = weight)


# Comment out type = "text" for latex output:
stargazer(m1, m2, m3, m4,
		  align=TRUE, omit.stat=c("LL","ser","f"), no.space=TRUE,
		  type = "text",
		  star.cutoffs = c(0.05, 0.01, 0.001),
		  covariate.labels = c("'Quite a bit'", 
		  					 "Prior turnout",
		  					 "Independent",
		  					 "Lean dem",
		  					 "Lean rep",
		  					 "Rep",
		  					 "Strong dem",
		  					 "Strong rep",
		  					 "Electoral college margin",
		  					 "Popular vote margin",
		  					 column.labels=c("", "","", ""))
)


# Check that results replicate using an MLM: 
# library(lme4)
# m <- lmer(voted ~ closerace + voted_prev + party + (EC_pct_abs_margin + PV_pct_abs_margin | year), data = anescdf16, weights = weight)
# summary(m)
# Results nearly identical 

# produce marginal effects plot for model 3: 
Eff <- Effect(c("closerace"), m3,
			  xlevels=list(closerace=c(0, 1)))

preds <- as.data.frame(Eff)

levels(preds$closerace)[levels(preds$closerace)=="Quite a bit"] <- "Leading candidate will\nwin by 'quite a bit'"
levels(preds$closerace)[levels(preds$closerace)=="Close Race"] <- "A close race"

ggplot(preds, aes(x=closerace, y=fit,
				  ymin=lower, ymax=upper,color=cbColor[2],fill=cbColor[2])) + 
	geom_point() +
	geom_errorbar(width = .01) +
	xlab("Perceived state\nof the race") +
	ylab("Proportion of respondents who vote conditioning\non election year, prior turnout, and pid\n(95% Confidence Intervals)") + 
	coord_flip() + 
	ggtitle("")+ 
	theme_pew() +
	scale_colour_manual(values=cbColor[3])+
	scale_fill_manual(values=cbColor[3])+ 
	scale_y_continuous(breaks=seq(.71,.77,.01),limits = c(.71, .77)) +
	theme(legend.position="none")
ggsave(filename="figures/anes_turnout_closerace.pdf", width=6,height=2)
ggsave(filename="figures/anes_turnout_closerace.png", width=5,height=2.5)



# Look at proportion "quite a bit" by party and year

# function for weighted CIs
ci <- function(x, w) {
	xm <- weighted.mean(x, w=w, na.rm = T)
	wv <- Hmisc::wtd.var(x, w, na.rm = T)
	1.96 * sqrt(wv/sum(na.omit(w)))
}

plotdat <- anescdf16 %>% group_by(year, party) %>% 
	summarise(m = weighted.mean(closerace == "Quite a bit", 
								na.rm=T, w = weight),
			  ci = ci(x=closerace=='Quite a bit', w = weight))
plotdat$party <- factor(plotdat$party, 
						levels = c('Strong Dem','Dem','Lean Dem','Ind','Lean Rep','Rep','Strong Rep'))
plotdat <- na.omit(plotdat)


ggplot(plotdat[as.numeric(as.character(plotdat$year)) >= 2004,], aes(x=party, y=m,
																	 ymin=m-ci, ymax=m+ci)) + 
	geom_point() +
	geom_errorbar(width = .01) +
	coord_flip() + 
	facet_wrap(~year, ncol=1) +
	ylab("Proportion respondents saying leading candidate\nwill win by 'quite a bit'") + 
	xlab("Party identification") +
	ggtitle("")+ 
	theme_pew() +
	scale_colour_manual(values=cbColor)+
	scale_fill_manual(values=cbColor)+ 
	theme(legend.position="none")
ggsave(filename="figures/anes_party_closerace.pdf", width=5,height=7)
ggsave(filename="figures/anes_party_closerace.png", width=5,height=7)


# Now look at proportion "Win by quite a bit" by year, plot 
# along side electoral college and popular vote margin
# for president. 


plotdat <- anescdf16 %>% group_by(year) %>% summarise(
	prop_close = weighted.mean(closerace == "Close Race", 
							   na.rm=T, w = weight),
	prop_wbqab = weighted.mean(closerace == "Quite a bit", 
							   na.rm=T, w = weight)
)
plotdat$ymd <- lubridate::ymd(paste0(plotdat$year, "/01/01"))

# Join back to winners data: 
plotdat <- merge(plotdat, winners,by="year")
plotdat <- na.omit(plotdat)

ggplot(plotdat, aes(ymd, prop_wbqab)) +
	geom_line(aes(ymd, PV_pct-.5), color = cbColor[3]) +
	geom_point(aes(ymd, PV_pct-.5), color = cbColor[3]) +
	geom_line(aes(ymd, EC_pct-.5), color = cbColor[2]) +
	geom_point(aes(ymd, EC_pct-.5), color = cbColor[2]) +  
	geom_line(color = cbColor[1]) +
	geom_point(color = cbColor[1]) +
	scale_y_continuous(labels = scales::percent) +#, limits = c(0, .9)) +
	scale_x_date(date_breaks = "4 years", date_labels = "%Y") +
	ylab("") + xlab("year") +
	theme_pew() + theme(legend.position="none") +
	annotate("text", x =  ymd("1970-06-01"), y = .70,
			 label = "% of Americans saying presidential candidate\nwill 'Win by quite a bit' (ANES)",
			 color = cbColor[1], size = 4) +
	annotate("text", x =  ymd("1956-01-01"), y = .45,
			 label = "Electoral college\nmargin",
			 color = cbColor[2], size = 4) +
	annotate("text", x =  ymd("1954-01-01"), y = .15,
			 label = "Popular vote\nmargin",
			 color = cbColor[3], size = 4) 

ggsave(filename="figures/anes_win_ec_time_series.pdf", width=9,height=4.5)
ggsave(filename="figures/anes_win_ec_time_series.png", width=9,height=4.5)

##########################
### Cable news nubmers ###
##########################

data<-read.csv("data/archive.csv",header=T,as.is=T)

avg_mentions <- sum(data$total)/98
# Average mentions per day -- 98 days in data (8/1/2016 to 11/7/16)

# Make source labels pretty for plotting
data$source <- paste(data$source, "\n(", data$total, ")", sep="")
data$source <- as.factor(data$source)
data$source <- factor(data$source,levels(data$source)[c(2,4,5,1,3,6)])

ggplot(data, aes(source,total, fill= as.factor(slant))) + 
	geom_col()+
	scale_fill_manual(values=polcol3[c(1,3,2)])  +
	theme_pew() + 
	ggtitle("Number of mentions of win-forecasts on cable news outlets\n8/1/2016 to 11/7/16")+
	theme(legend.position="none") +
	theme(axis.title.y = element_text(size=10)) +
	theme(axis.text.y = element_text(size=1)) +
	theme(panel.border = element_blank(),
		  axis.line=element_blank(),
		  axis.ticks=element_blank(),
		  axis.text.x = element_text(size=10),
		  axis.title.x = element_blank()) +
	ylab("") + 
	annotate("text", x = 2, y = 400, label = paste(round(avg_mentions,2), "average \nmentions/day"), size = 4) 
ggsave("figures/cable_forMC.pdf", width = 6, height = 4.5)
ggsave("figures/cable_forMC.png", width = 6, height = 4.5)


anescdf16$party3 <- factor(anescdf16$party, 
						   levels = c('Strong Dem','Dem','Lean Dem','Ind','Lean Rep','Rep','Strong Rep'))
levels(anescdf16$party3) <- c("Dem", "Dem", "Dem", "Ind/Oth", "Rep", "Rep", "Rep")

anescdf16$party7 <- factor(anescdf16$party, 
						   levels = c('Strong Dem','Dem','Lean Dem','Ind','Lean Rep','Rep','Strong Rep'))
anescdf16$party7 <- as.numeric(anescdf16$party7)
anescdf16$party7 <- anescdf16$party7-4
table(anescdf16$party7)

m16 <- lm(voted ~ voted_prev + closerace*abs(party7) + closerace*party7, 
		  data = anescdf16, weights = weight,
		  subset = year=="2016")
summary(m16)

# examine year on year variation (not in MS): 
Eff <- Effect(c("closerace", "party7"), m16,
			  xlevels=list(closerace=c(0, 1), levels(anescdf16$party7)),
)
preds <- as.data.frame(Eff)


ggplot(preds, aes(x=party7, y=fit, ymin=fit-se, ymax=fit+se, 
				  col=closerace, group=closerace)) + 
	geom_line() +
	geom_ribbon(alpha=0.5) +
	xlab("7 point party ID") +
	ylab("Likelihood of voting in 2016 election") + 
	# coord_cartesian(ylim = c(.6, .8)) + 
	ggtitle("Voting in 2016")+ 
	theme_pew() +
	scale_colour_manual(values=cbColor[1:2])+
	scale_fill_manual(values=cbColor[1:2]) + 
	theme(legend.position="none")


m16 <- lm(voted ~ closerace*party + voted_prev, 
		  data = anescdf16, weights = weight,
		  subset = year=="2016" )

summary(lm(voted ~ closerace*party + voted_prev, 
		   data = anescdf16, weights = weight,
		   subset = year=="2016"))

# examine year on year variation (not in MS): 
Eff <- Effect(c("closerace", "party"), m16,
			  xlevels=list(closerace=c(0, 1), levels(anescdf16$party)),
)
preds <- as.data.frame(Eff)

ggplot(preds, aes(x=closerace, y=fit, ymin=lower, ymax=upper)) + 
	geom_point() +
	geom_errorbar(width = .01) +
	coord_flip() + 
	facet_wrap(~party, ncol=1) +
	ylab("Proportion respondents saying leading candidate\nwill win by 'quite a bit'") + 
	xlab("Party identification") +
	ggtitle("")+ 
	theme_pew() +
	scale_colour_manual(values=cbColor)+
	scale_fill_manual(values=cbColor)+ 
	theme(legend.position="none")

#####################################
### Simulation for Study 1 design ###
#####################################

# Actual support in electorate: 
vote_share <- seq(from= .45, to = .55, by = .01)

# How many surveys
N_survey <- 20

# Survey noise: 
survey_noise_sd = .04

# initialize simulation vars for each level of support
mae_check <- numeric(length(vote_share))
est <- numeric(length(vote_share))
est_se <- numeric(length(vote_share))

# Simulate for each level of support
for(vs in 1:length(vote_share)){
	
	# add noise to each survey
	survey_noise <- rnorm(n = N_survey, sd = survey_noise_sd)
	
	# draw respondents from 20 surveys
	n <- 1000
	p <- vote_share[vs] + survey_noise
	p[p>1] <- 1
	p[p<0] <- 0
	
	# MAE check:
	mae_check[vs] <- mean(abs(p - vote_share[vs]))
	
	# Simulate data collection
	svy <- numeric(length(p))
	for(i in 1:length(p)){
		svy[i] <- mean(rbinom(n = n, size = 1, prob = p[i]))
	}
	
	# Estimate of population support
	est[vs] <- mean(svy)
	est_se[vs] <- sqrt(var(svy)/N_survey)
	
}

var(p)

# Check MAE
mean(mae_check)

# Remove vote share = .50
est <- vote_share[-6]

# Compute standard error
est_se <- mean(est_se)

# For exact replication, we fix SE: 
est_se = 0.01002522

# Compute probabilistic estimate for each survey
est_prob <- 1-pnorm(q = .5, mean = est, sd = est_se * sqrt(N_survey) )

est
est_prob

# T distr is symmetric otherwise we'd say 
# qt(c(.025, .975), df = N_survey)

# Compute T stat
CI_mult <- qt(.975, df = N_survey) 

# Compute CI for each survey
CI <- CI_mult * est_se


svy_mean = .55
svy_SD = 0.04483415
N_svy = 20
margin_of_error = qt(.975, df = N_svy)  * svy_SD/sqrt(N_svy) 
svy_mean
margin_of_error

prob_win = 1-pnorm(q = .50, mean = svy_mean, sd = svy_SD)
prob_win

####################
### SM - Study 1 ###
####################

#load the data
dta <- (read_sav('ATP W25.sav'))

# load wave 21 for previous vote history
dta21 <- (read_sav('ATP W21.sav'))

# Read in assignment join table, join to conditions
treat_dat <- read.csv('assingments.csv')

# merge in treatment data
treat_dat$display_cond <- gsub("_text", "", treat_dat$display_cond)
dta$condition <- treat_dat$display_cond[match(dta$T_MOE_V_PWIN_W25, treat_dat$id)]
dta$est <- treat_dat$est[match(dta$T_MOE_V_PWIN_W25, treat_dat$id)]
dta$est_prob <- treat_dat$est_prob[match(dta$T_MOE_V_PWIN_W25, treat_dat$id)]

# recode certainty to 0-1
dta$certainty<-NA
dta$certainty[which(dta$CERTWIN_W25==1)]<-1
dta$certainty[which(dta$CERTWIN_W25==2)]<-.75
dta$certainty[which(dta$CERTWIN_W25==3)]<-.5
dta$certainty[which(dta$CERTWIN_W25==4)]<-.25
dta$certainty[which(dta$CERTWIN_W25==5)]<-0

# Set problematic values to NA
dta$likely_to_win <- dta$LIKELYWIN_1_W25
dta$likely_to_win[dta$likely_to_win > 100] <- NA
dta$percent_vote <- dta$PRCNTVT_1_W25
dta$percent_vote[dta$percent_vote > 100] <- NA

# Merge in data from prior wave
dta$chance_of_voting_16 = dta21$SCALE10_W21[match(dta$QKEY, dta21$QKEY)]
dta$chance_of_voting_16[dta$chance_of_voting_16==99] <- NA
dta$Republican = dta$PARTY_W25 ==1
dta$Democrat = dta$PARTY_W25 ==2

# Pooling for simplier presentation
dta$cond3 <- as.factor(dta$condition)
levels(dta$cond3) <- c("pwin", "both", "both", "share", "share", "both", "both")
levels(dta$cond3) <- c("P(win)", "Both", "Vote share")
dta$cond3 <- factor(dta$cond3, levels = c("P(win)", "Vote share", "Both"))


#######################################
### Summary statistics from Study 1 ###
#######################################

# total N
sum(table(dta$condition))

# Female
table(dta$F_SEX_FINAL==2)
prop.table(table(dta$F_SEX_FINAL==2))

# White
table(dta$F_RACETHN_RECRUITMENT==1)
prop.table(table(dta$F_RACETHN_RECRUITMENT==1))

# Black
table(dta$F_RACETHN_RECRUITMENT==2)
prop.table(table(dta$F_RACETHN_RECRUITMENT==2))

# Hispanic
table(dta$F_RACETHN_RECRUITMENT==3)
prop.table(table(dta$F_RACETHN_RECRUITMENT==3))

# Other
table(dta$F_RACETHN_RECRUITMENT==4)
prop.table(table(dta$F_RACETHN_RECRUITMENT==4))

# Age
dta$agecat <- cut(dta$F_AGE_FINAL, breaks = c(17, 34, 54, Inf))
dta$agecat[dta$F_AGE_FINAL==99] <- NA
table(dta$agecat)
prop.table(table(dta$agecat))

# Education
table(dta$F_EDUCCAT_FINAL)
prop.table(table(dta$F_EDUCCAT_FINAL))


#####################################
## Additional results from Study 1 ##
#####################################

# Table for main result estimates using OLS
dta$cond3 <- factor(dta$cond3, levels = c("Vote share","Both","P(win)"))

percent_vote <- lm(percent_vote/100 ~
				   	est * cond3, 
				   data = dta)
likely_to_win <- lm(likely_to_win/100 ~
						est * cond3, 
					data = dta)
certainty <- lm(certainty ~
					cond3, 
				data = dta)
certaintygt50 <- lm(certainty ~
						cond3, 
					data = dta,
					est > .50)

# Comment out type = "text" for latex output:
stargazer(percent_vote, likely_to_win, certainty, certaintygt50,
		  align=TRUE, omit.stat=c("LL","ser","f"), no.space=TRUE,
		  type = "text",
		  star.cutoffs = c(0.05, 0.01, 0.001),
		  covariate.labels = c("Estimate", 
		  					 "Condition: Both",
		  					 "Condition: Probability",
		  					 "Est $\\times$ Cond: Both",
		  					 "Est $\\times$ Cond: Probability",
		  					 "Intercept"),
		  column.labels=c("Percent of vote", "Likely to win","Certainty", 
		  				"Certainty (Share \\textgreater 50\\%)")
)



############################################################################
## Analysis of study 1 with probability-vote share confused cases removed ##
############################################################################

# Conflated vote share and prob? 
threshold = .01
dta$conflated_p_vs <- abs(dta$percent_vote/100 - dta$est_prob) < threshold
se = function(x) sd(x, na.rm = T)/sqrt(length(na.omit(x)))
dta %>% group_by(cond3) %>% 
	summarise(m = mean(conflated_p_vs, na.rm=T), se = se(conflated_p_vs))

# REMOVE THESE CASES, REANALYZE
percent_vote <- lm(percent_vote/100 ~
				   	est * cond3, 
				   data = dta, subset = conflated_p_vs==FALSE)
likely_to_win <- lm(likely_to_win/100 ~
						est * cond3, 
					data = dta, subset = conflated_p_vs==FALSE)
certainty <- lm(certainty ~
					cond3, 
				data = dta, subset = conflated_p_vs==FALSE)
certaintygt50 <- lm(certainty ~
						cond3, 
					data = dta, subset = conflated_p_vs==FALSE & est > .50)

# Comment out type = "text" for latex output:
stargazer(percent_vote, likely_to_win, certainty, certaintygt50,
		  align=TRUE, omit.stat=c("LL","ser","f"), no.space=TRUE,
		  star.cutoffs = c(0.05, 0.01, 0.001),
		  type = "text",
		  covariate.labels = c("Estimate", 
		  					 "Condition: Both",
		  					 "Condition: Probability",
		  					 "Est $\\times$ Cond: Both",
		  					 "Est $\\times$ Cond: Probability",
		  					 "Intercept"),
		  column.labels=c("Percent of vote", "Likely to win","Certainty", 
		  				"Certainty (Share \\textgreater 50\\%)")
)

####################################
### Analysis of order in Study 1 ###
####################################

dta$condition <- as.character(dta$condition)
dta$condition <- factor(dta$condition)
dta$cond <- dta$condition
levels(dta$cond) <- gsub(pattern = "_moe", 
						 replacement = "", 
						 x = levels(dta$cond))

dta$order <- "Only one display"
dta$order[dta$cond=="pwin_share"] <- "P(win) first"
dta$order[dta$cond=="share_pwin"] <- "Vote share first"

dta$cond <- relevel(dta$cond, 2) 
percent_vote <- lm(percent_vote/100 ~
				   	est * order, 
				   data = dta, 
				   subset = cond %in% c("pwin_share", "share_pwin"))

likely_to_win <- lm(likely_to_win/100 ~
						est * order, 
					data = dta,
					subset = cond %in% c("pwin_share", "share_pwin"))

certainty <- lm(certainty ~
					order, 
				data = dta,
				subset = cond %in% c("pwin_share", "share_pwin"))
certaintygt50 <- lm(certainty ~
						order, 
					data = dta,
					subset = cond %in% c("pwin_share", "share_pwin") &
						est > .50)

# Comment out type = "text" for latex output:
stargazer(percent_vote, likely_to_win, certainty, certaintygt50,
		  align=TRUE, omit.stat=c("LL","ser","f"), no.space=TRUE,
		  star.cutoffs = c(0.05, 0.01, 0.001),
		  type = "text",
		  covariate.labels = c("Estimate",
		  					 "Vote share first",
		  					 "Est $\\times$ share first",
		  					 "Intercept"),
		  column.labels=c("Percent of vote", "Likely to win","Certainty", 
		  				"Certainty (\\textgreater 50\\%)")
)

#############################################
### Inclusion of Margin of Error, Study 1 ###
#############################################

dta$cond_moe <- factor(dta$condition)
levels(dta$cond_moe) <- c(0, 0, 1, 0, 1, 1, 0)

percent_vote <- lm(percent_vote/100 ~
				   	est * cond3 * cond_moe, 
				   data = dta, 
				   subset = cond3 != "P(win)")
likely_to_win <- lm(likely_to_win/100 ~
						est * cond3 * cond_moe, 
					data = dta,
					subset = cond3 != "P(win)")
certainty <- lm(certainty ~
					cond3 * cond_moe, 
				data = dta,
				subset = cond3 != "P(win)")
certaintygt50 <- lm(certainty ~
						cond3 * cond_moe, 
					data = dta,
					subset = est > .5 & cond3 != "P(win)")

# Comment out type = "text" for latex output:
stargazer(percent_vote, likely_to_win, certainty, certaintygt50,
		  align=TRUE, omit.stat=c("LL","ser","f"), no.space=TRUE,
		  star.cutoffs = c(0.05, 0.01, 0.001),
		  type = "text",
		  covariate.labels = c("Estimate",
		  					 "Condition: Both",
		  					 "Condition: MOE",
		  					 "Est $\\times$ Both",
		  					 "Est $\\times$ MOE",
		  					 "Both $\\times$ MOE",
		  					 "Est $\\times$ Both $\\times$ MOE",
		  					 "Intercept"),
		  column.labels=c("Percent of vote", "Likely to win","Certainty", 
		  				"Certainty (\\textgreater 50\\%)")
)


###################################################
### Self-reported vote intent analysis, Study 1 ###
###################################################

dta$hype_vote <- dta$VOTEHYPO_W25
dta$hype_vote[dta$hype_vote==99] <- NA
dta$hype_vote <- 5 - as.numeric(dta$hype_vote)
table(dta$VOTEHYPO_W25)
prop.table(table(dta$hype_vote))
dta$Republican <- as.numeric(dta$Republican)
dta$Democrat <- as.numeric(dta$Democrat)

dta$report_2016_voted <- as.numeric(dta$chance_of_voting_16)

hv <- lm(hype_vote ~ est*cond3,
		 data=dta)
hv_sd <- lm(hype_vote ~ est*cond3 + report_2016_voted,
			data=dta)
hv_sd_pid <-lm(hype_vote ~ est * cond3 + 
			   	report_2016_voted + 
			   	Republican + Democrat,
			   data=dta)
summary(hv_sd_pid)


# Comment out type = "text" for latex output:
stargazer(hv, hv_sd, hv_sd_pid,
		  align=TRUE, omit.stat=c("LL","ser","f"), no.space=TRUE,
		  type = "text",
		  star.cutoffs = c(0.05, 0.01, 0.001),
		  covariate.labels = c("Estimate", 
		  					 "Condition: Both",
		  					 "Condition: Probability",
		  					 "Reported voting in 2016",
		  					 "Republican",
		  					 "Democrat",
		  					 "Est $\\times$ Cond: Both",
		  					 "Est $\\times$ Cond: Probability",
		  					 "Intercept"),
		  column.labels=c("Vote intent self report")
)





#######################
# Study 1 robustness #
#######################

dta$both_50 <-(dta$likely_to_win==50 & dta$percent_vote==50)
length(which(dta$both_50))/length(dta$both_50)

threshold = .011
dta$dist_p_vs <- abs(dta$percent_vote/100 - dta$likely_to_win/100)
dta$dist_p_vs <- abs(dta$percent_vote/100 - dta$likely_to_win/100)
dta$dist_p_vs <- abs(dta$percent_vote/100 - dta$likely_to_win/100)
dta$conflated_p_vs <- dta$dist_p_vs < threshold
dta$same_p_vs <- dta$percent_vote/100 == dta$likely_to_win/100
se = function(x) sd(x, na.rm = T)/sqrt(length(na.omit(x)))

dta$cond3 <- factor(dta$cond3, levels = rev(c("P(win)", "Both", "Vote share")))

dta %>% filter(!both_50) %>% group_by(cond3) %>% 
	summarise(m = mean(conflated_p_vs, na.rm=T), se = se(conflated_p_vs))

dta %>% group_by(cond3) %>% 
	summarise(m = mean(both_50, na.rm=T), se = se(both_50), N = length(both_50))

# How many put down the same answer? 
mean(dta$same_p_vs, na.rm=T)
mean(dta$both_50, na.rm=T)
plotdat <- dta %>% group_by(cond3) %>% 
	summarise(m = mean(same_p_vs, na.rm=T), se = se(same_p_vs), N = length(same_p_vs))
plotdat

ggplot(plotdat, 
	   aes(x=cond3, y=m, ymin=m-1.96*se, ymax=m+1.96*se, 
	   	group=cond3, color = cond3)) + 
	geom_pointrange() + coord_flip() +
	theme_pew()  + theme(legend.position="none") +
	scale_colour_manual(values=cbColor[c(2,3,1)]) +
	scale_fill_manual(values=cbColor[c(2,3,1)]) +
	xlab("Condition") + 
	ylab("Proportion reporting same number for\nvote share and likelihood of winning, 95% CI") 
ggsave(filename="same_vote_likelihood.pdf", width=6.5,height=2.5)



t.report <- function(tt){
	tvalue <- tt$statistic %>% formatC(digits = 2, format = "f")
	pvalue <- tt$p.value %>% formatC(digits = 6, format = "f")
	df <- tt$parameter %>% formatC(digits = 1, format = "f")
	M1 <- tt$estimate[1] %>% formatC(digits = 2, format = "f")
	M1l <- gsub("mean in group ", "", names(M1))
	M2 <- tt$estimate[2] %>% formatC(digits = 2, format = "f")
	M2l <- gsub("mean in group ", "", names(M2))
	# Latex output: 
	cat(paste0("$M_{", M1l, "} = ", M1, ", M_{", M2l, "} = ", M2,
			   ", T(",df,") = ",tvalue, ", P = ", pvalue, "$"))
	cat("\n")
	# plain text:
	cat(paste0("M_", M1l, " = ", M1, ", M_", M2l, " = ", M2,
			   ", T(",df,") = ",tvalue, ", P = ", pvalue, ", two-sided"))
}
dta$both_v_rest <- dta$cond3=="Both"
t.report(t.test(same_p_vs ~ both_v_rest, data=dta))




# Now look into error respondents made on average:
plotdat <- dta %>% filter(!both_50) %>% group_by(cond3) %>% 
	summarise(m = mean(abs(likely_to_win/100 - est), na.rm=T), 
			  se = se(abs(likely_to_win/100 - est)), 
			  N = length(abs(likely_to_win/100 - est)))
plotdat$var <- "Distance between reported likelihood and provided vote share"
plotdat2 <- dta %>% filter(!both_50) %>% group_by(cond3) %>% 
	summarise(m = mean(abs(likely_to_win/100 - est_prob), na.rm=T), 
			  se = se(abs(likely_to_win/100 - est_prob)), 
			  N = length(abs(likely_to_win/100 - est_prob)))
plotdat2$var <- "Distance between reported likelihood and provided win likelihood"

plotdat <- rbind(plotdat, plotdat2)

ggplot(plotdat, 
	   aes(x=cond3, y=m, ymin=m-1.96*se, ymax=m+1.96*se, 
	   	group=cond3, color = cond3)) + 
	geom_pointrange() + coord_flip() +
	theme_pew()  + theme(legend.position="none") +
	facet_wrap(~var, ncol=1) +
	scale_colour_manual(values=cbColor[c(2,3,1)]) +
	scale_fill_manual(values=cbColor[c(2,3,1)]) +
	xlab("Condition") + 
	ylab("Average error, 95% CI") 
ggsave(filename="Errors_vs_prob.pdf", width=6.5,height=2.5)


plotdat <- dta %>% group_by(cond3) %>% 
	summarise(m = mean(both_50, na.rm=T), se = se(both_50))

ggplot(plotdat, aes(x=cond3, y=m, ymin=m-1.96*se, ymax=m+1.96*se)) + 
	geom_pointrange() + coord_flip() +
	theme_pew()  + theme(legend.position="none") 




####################################
### A: R judgement re VOTE SHARE ###
####################################

xbreaks = c(.45, .475, .50, .525, .55)



ggplot(subset(dta, both_50==F), aes(est,percent_vote/100, group=cond3, color = cond3)) + 
	geom_smooth( method = "loess",
				 span = 1.5,
				 aes(fill=cond3),
				 alpha=0.6) +
	theme_pew()  + theme(legend.position="none") +
	scale_colour_manual(values=cbColor) +
	scale_fill_manual(values=cbColor) +
	coord_cartesian(ylim= c(.4, .69)) +
	scale_y_continuous(labels=scales::percent) +
	scale_x_continuous(labels=xlabs, breaks=xbreaks) +
	annotate("text", x = .525, y = .685, 
			 label = "Estimate presented as", size = 6) + 
	annotate("text", x = .47, y = .58, label = "Vote share", 
			 color = cbColor[2], size = 6) + 
	ylab("Reported percent of vote expected for candidate A") +
	annotate("text", x = .530, y = .661, label = "P(win)", 
			 color = cbColor[1], size = 6) + 
	annotate("text", x = .535, y = .61, label = "Both", 
			 color = cbColor[3], size = 6) + 
	annotate("text", x = .52, y = .49, label = "Maximum Accuracy", 
			 color = "#707070", size = 6) + 
	xlab("Election information presented to participant,\nvote share (probability)") + 
	geom_vline(xintercept=c(.50),linetype="dotted",color="#707070") +
	geom_segment(aes(x=.45, y=.45, xend=.55, yend=.55),
				 linetype="dashed",color="#707070") +
	ggtitle("Reported vote share expected (LOESS)") 

ggsave(filename="votea_no_50_50.pdf", width=6.5,height=5)

m<-lm(percent_vote/100~est*cond3,data=subset(dta, both_50==F))
summary(m)
eff<-effect(m,term="est*cond3",se=T,default.levels=100)

dataeff<-as.data.frame(eff)
dataeff[dataeff$est ==.55,]
dataeff[dataeff$est ==.45,]


diff(dataeff[dataeff$est==.55,]$fit)
diff(dataeff[dataeff$est==.45,]$fit)

dataeff <- dataeff[dataeff$est==.55,]
dataeff$cond3 <- factor(dataeff$cond3,levels(dataeff$cond3)[c(3,1,2)])
dataeff <- dataeff[c(3,1,2),]

qplot(cond3,fit,data=dataeff,color=c(cbColor[1],cbColor[3],cbColor[2])) +
	geom_errorbar(aes(ymin=lower, ymax=upper),width=.25) +
	theme_pew()  +
	scale_y_continuous(labels=scales::percent, 
					   breaks = c(.50, .60)) +
	
	ggtitle("Vote share (OLS)") +
	xlab("Estimate at 55%\n(0.87)\n") + 
	ylab("") +
	scale_colour_manual(values=cbColor)+
	scale_fill_manual(values=cbColor) +
	
	# First arrow
	annotate("segment",x=1.5,xend=1.5,y=dataeff$fit[3],yend=dataeff$fit[1],size=0.3,color="#707070") +
	annotate("segment",x=1.5,xend=1,y=dataeff$fit[3],yend=dataeff$fit[3],size=0.3,color="#707070") +
	annotate("segment",x=1.5,xend=1.95,y=dataeff$fit[1],yend=dataeff$fit[1],size=0.3,
			 arrow=arrow(length=unit(0.2,"cm"),type="closed"),color="#707070") +
	annotate("text", size=3, x = 1.65, y = .62, 
			 label = paste(round(((dataeff$fit[1]/dataeff$fit[3])-1)*100, digits=2), "%\nlarger\nshare",sep=""),
			 hjust = 0) +
	
	# Second arrow 
	annotate("segment",x=2.5,xend=2.5,y=dataeff$fit[2],yend=dataeff$fit[1],size=0.3,color="#707070") +
	annotate("segment",x=2.5,xend=2,y=dataeff$fit[1],yend=dataeff$fit[1],size=0.3,color="#707070") +
	annotate("segment",x=2.5,xend=2.95,y=dataeff$fit[2],yend=dataeff$fit[2],size=0.3,
			 arrow=arrow(length=unit(0.2,"cm"),type="closed"),color="#707070") +
	annotate("text", size=3, x = 2.65, y = .61, 
			 label = paste(abs(round(((dataeff$fit[2]/dataeff$fit[1])-1)*100, digits=2)), "%\nlarger\nshare",sep=""),
			 hjust = 0) +
	
	# Third arrow
	annotate("segment", x=1.25,xend=1.25,y=dataeff$fit[3],yend=dataeff$fit[2],
			 size=0.3, linetype="dashed", color="#707070") +
	annotate("segment", x=1.25,xend=2.5,y=dataeff$fit[2],yend=dataeff$fit[2],
			 size=0.3, linetype="dashed", color="#707070") +
	annotate("text", size=3, x = 1.20, y = .61, 
			 label = paste(abs(round(((dataeff$fit[2]/dataeff$fit[3])-1)*100, digits=2)), 
			 			  "%\nlarger\nshare",sep=""),
			 hjust = 1) +
	
	# Max Accuracy
	# annotate("segment",x=.5,xend=2.95,y=.55,yend=.55,size=0.3,
	#          arrow=arrow(length=unit(0.2,"cm"),type="closed"),color="#707070") +
	annotate("text", size=3, x = 1.25, y = .54, 
			 label = "Maximum Accuracy",
			 hjust = 0) +
	geom_hline(yintercept=c(.55),linetype="dotted",color="#707070") +
	theme(legend.position="none") 
ggsave(filename="voteb.pdf", width=3,height=5.22)

################################
### B: Likelihood of winning ###
################################

ggplot(subset(dta, both_50==F), aes(est,likely_to_win/100, group=cond3, color = cond3)) + 
	geom_smooth( method = "loess",
				 span = 1.5,
				 aes(fill=cond3),
				 alpha=0.6) +
	theme_pew()  +
	# scale_y_continuous(labels=scales::percent) +
	scale_y_continuous( labels=scales::percent, 
						breaks = c(.25, .5, .75)) +
	coord_cartesian(ylim = c(.10,.90)) + 
	scale_x_continuous(labels=xlabs, breaks=xbreaks) +
	annotate("text", x = .495, y = .85, 
			 label = "Estimate presented as", size = 6) + 
	annotate("text", x = .475, y = .62, label = "Vote share", 
			 color = cbColor[2], size = 6) + 
	ylab("Reported likelihood thatcandidate A wins") +
	annotate("text", x = .52, y = .71, label = "P(win)", 
			 color = cbColor[1], size = 6) + 
	annotate("text", x = .502, y = .63, label = "Both", 
			 color = cbColor[3], size = 6) + 
	annotate("text", x = .4835, y = .22, label = "Maximum Accuracy", 
			 color = "#707070", size = 6) + 
	xlab("Election information presented to participant,\nvote share (probability)")+ 
	theme(legend.position="none")+
	geom_vline(xintercept=c(.50),linetype="dotted",color="#707070") +
	geom_segment(aes(x=.45, y=.13, xend=.55, yend=.87),
				 linetype="dashed",color="#707070") +
	ggtitle("Reported likelihood that Candidate A wins (LOESS)") +
	scale_colour_manual(values=cbColor)+
	scale_fill_manual(values=cbColor)
ggsave(filename="likelihood_loess_no_50_50.pdf", width=6.5,height=5)

m<-lm(likely_to_win/100~est*cond3,data=subset(dta, both_50==F))
summary(m)
eff<-effect(m,term="est*cond3",se=T,default.levels=100)

dataeff<-as.data.frame(eff)
dataeff[dataeff$est ==.55,]
dataeff[dataeff$est ==.45,]


diff(dataeff[dataeff$est==.55,]$fit)
diff(dataeff[dataeff$est==.45,]$fit)

dataeff <- dataeff[dataeff$est==.55,]
dataeff$cond3 <- factor(dataeff$cond3,levels(dataeff$cond3)[c(3,1,2)])
dataeff <- dataeff[c(3,1,2),]

qplot(cond3,fit,data=dataeff,color=c(cbColor[1],cbColor[3],cbColor[2])) +
	geom_errorbar(aes(ymin=lower, ymax=upper),width=.25) +
	theme_pew()  +
	scale_y_continuous( #labels=scales::percent, 
		breaks = c(.25, .5, .75), limits = c(.10,.90)) +
	ggtitle("Likelihood of winning (OLS)") +
	xlab("Estimate at 55%\n(0.87)\n") + 
	ylab("") +
	scale_colour_manual(values=cbColor)+
	scale_fill_manual(values=cbColor) +
	
	# First arrow
	annotate("segment", x=1.25,xend=1.25,y=dataeff$fit[3],yend=dataeff$fit[2],
			 size=0.3, linetype="dashed", color="#707070") +
	annotate("segment", x=1.25,xend=2.5,y=dataeff$fit[2],yend=dataeff$fit[2],
			 size=0.3, linetype="dashed", color="#707070") +
	annotate("text", size=3, x = 1.20, y = .68, 
			 label = paste(abs(round(((dataeff$fit[2]/dataeff$fit[3])-1)*100, digits=2)), 
			 			  "%\nmore",sep=""),
			 hjust = 1) +
	
	# Second arrow 
	annotate("segment",x=1.5,xend=1.5,y=dataeff$fit[3],yend=dataeff$fit[1],size=0.3,color="#707070") +
	annotate("segment",x=1.5,xend=1,y=dataeff$fit[3],yend=dataeff$fit[3],size=0.3,color="#707070") +
	annotate("segment",x=1.5,xend=1.95,y=dataeff$fit[1],yend=dataeff$fit[1],size=0.3,
			 arrow=arrow(length=unit(0.2,"cm"),type="closed"),color="#707070") +
	annotate("text", size=3, x = 1.65, y = .58, 
			 label = paste(round(((dataeff$fit[1]/dataeff$fit[3])-1)*100, digits=2), "%\nmore",sep=""),
			 hjust = 0) +
	
	# Third arrow
	annotate("segment",x=2.5,xend=2.5,y=dataeff$fit[2],yend=dataeff$fit[1],size=0.3,color="#707070") +
	annotate("segment",x=2.5,xend=2,y=dataeff$fit[1],yend=dataeff$fit[1],size=0.3,color="#707070") +
	annotate("segment",x=2.5,xend=2.95,y=dataeff$fit[2],yend=dataeff$fit[2],size=0.3,
			 arrow=arrow(length=unit(0.2,"cm"),type="closed"),color="#707070") +
	annotate("text", size=3, x = 2.65, y = .62, 
			 label = paste(abs(round(((dataeff$fit[2]/dataeff$fit[1])-1)*100, digits=2)), "%\nmore",sep=""),
			 hjust = 0) +
	# Max Accuracy
	annotate("text", size=3, x = 1.25, y = .86, 
			 label = "Maximum Accuracy",
			 hjust = 0) +
	geom_hline(yintercept=c(.87),linetype="dotted",color="#707070") +
	theme(legend.position="none") 
ggsave(filename="likelihood_win55_no_50_50.pdf", width=3,height=5.22)

####################
### C: Certainty ###
####################

ggplot(subset(dta, both_50==F), aes(est,certainty, group=cond3, color = cond3)) + 
	geom_smooth(method = "loess", aes(fill=cond3), span = 1.5, alpha=0.6) +
	theme_pew()  +
	coord_cartesian(ylim = c(.52,.75)) + 
	scale_y_continuous(labels=scales::percent) + 
	scale_x_continuous(labels=xlabs, breaks=xbreaks) +
	annotate("text", x = .505, y = .74, 
			 label = "Estimate presented as", size = 6) + 
	annotate("text", x = .54, y = .54, label = "Vote share", 
			 color = cbColor[2], size = 6) + 
	ylab("Certainty reported that candidate A will win/lose") +
	annotate("text", x = .49, y = .651, label = "P(win)", 
			 color = cbColor[1], size = 6) + 
	annotate("text", x = .52, y = .60, label = "Both", 
			 color = cbColor[3], size = 6) + 
	xlab("Election information presented to participant,\nvote share (probability)")+ 
	theme(legend.position="none")+
	geom_vline(xintercept=c(.50),linetype="dotted",color="#707070") +
	ggtitle("Certainty (LOESS)")+
	scale_colour_manual(values=cbColor)+
	scale_fill_manual(values=cbColor) 

ggsave(filename="certaintyc_no_50_50.pdf", width=6.5,height=5)


m<-lm(certainty~est*cond3,data=subset(dta, both_50==F))
summary(m)
eff<-effect(m,term="est*cond3",se=T,default.levels=100)

dataeff<-as.data.frame(eff)
dataeff <- dataeff[dataeff$est==.55,]

dataeff$cond3 <- factor(dataeff$cond3,levels(dataeff$cond3)[c(3,1,2)])
dataeff <- dataeff[c(3,1,2),]

qplot(cond3,fit,data=dataeff,color=c(cbColor[1],cbColor[3],cbColor[2]),height=200) +
	geom_errorbar(aes(ymin=lower, ymax=upper),width=.25) +
	theme_pew()  +
	coord_cartesian(ylim = c(.52,.75)) + 
	scale_y_continuous(labels=scales::percent) + 
	ggtitle("Certainty (OLS)") +
	xlab("Estimate at 55%\n(0.87)\n") + 
	ylab("") +
	scale_colour_manual(values=cbColor)+
	scale_fill_manual(values=cbColor) +
	# First arrow
	annotate("segment",x=1.5,xend=1.5,y=dataeff$fit[3],yend=dataeff$fit[1],size=0.3,color="#707070") +
	annotate("segment",x=1.5,xend=1,y=dataeff$fit[3],yend=dataeff$fit[3],size=0.3,color="#707070") +
	annotate("segment",x=1.5,xend=1.95,y=dataeff$fit[1],yend=dataeff$fit[1],size=0.3,
			 arrow=arrow(length=unit(0.2,"cm"),type="closed"),color="#707070") +
	annotate("text", size=3, x = 1.65, y = .59, 
			 label = paste(round(((dataeff$fit[1]/dataeff$fit[3])-1)*100, digits=2), "%\nmore\ncertain",sep=""),
			 hjust = 0) +
	
	# Second arrow 
	annotate("segment",x=2.5,xend=2.5,y=dataeff$fit[2],yend=dataeff$fit[1],size=0.3,color="#707070") +
	annotate("segment",x=2.5,xend=2,y=dataeff$fit[1],yend=dataeff$fit[1],size=0.3,color="#707070") +
	annotate("segment",x=2.5,xend=2.95,y=dataeff$fit[2],yend=dataeff$fit[2],size=0.3,
			 arrow=arrow(length=unit(0.2,"cm"),type="closed"),color="#707070") +
	annotate("text", size=3, x = 2.65, y = .63, 
			 label = paste(abs(round(((dataeff$fit[2]/dataeff$fit[1])-1)*100, digits=2)), "%\nmore\ncertain",sep=""),
			 hjust = 0) +
	
	# Third arrow
	annotate("segment", x=1.25,xend=1.25,y=dataeff$fit[3],yend=dataeff$fit[2],
			 size=0.3, linetype="dashed", color="#707070") +
	annotate("segment", x=1.25,xend=2.5,y=dataeff$fit[2],yend=dataeff$fit[2],
			 size=0.3, linetype="dashed", color="#707070") +
	annotate("text", size=3, x = 1.20, y = .65, 
			 label = paste(abs(round(((dataeff$fit[2]/dataeff$fit[3])-1)*100, digits=2)), 
			 			  "%\nmore\ncertain",sep=""),
			 hjust = 1) +
	theme(legend.position="none") 

ggsave(filename="certaintyd_no_50_50.pdf", width=3,height=5.22)















##################################################
### Analysis of which candidate is in the lead ###
### Replication of Study 1                     ###
##################################################

aggregatorExtensions <- read_csv("data/aggregatorExtensions.csv")

whichUp <- aggregatorExtensions[aggregatorExtensions$study2==3 ,]

whichUp <- whichUp[whichUp$corrected_survey==1,]
whichUp$certainty<-NA
whichUp$certainty[which(whichUp$Q63=="1")]<-1
whichUp$certainty[which(whichUp$Q63=="2")]<-.75
whichUp$certainty[which(whichUp$Q63=="3")]<-.5
whichUp$certainty[which(whichUp$Q63=="4")]<-.25
whichUp$certainty[which(whichUp$Q63=="5")]<-0

c<-lm(certainty~as.factor(stimuli.cand)*as.factor(stimuli.est), data=whichUp[whichUp$stimuli.est>.48,])
table(whichUp$stimuli.cand)
v<-lm(Q67~as.factor(stimuli.cand)*as.factor(stimuli.est), data=whichUp[whichUp$stimuli.est>.48,])
p<-lm(Q65~as.factor(stimuli.cand)*as.factor(stimuli.est), data=whichUp[whichUp$stimuli.est>.48,])

# Comment out type = "text" for latex output:
stargazer(c,v,p,
		  star.cutoffs = c(0.05, 0.01, 0.001),
		  type = "text", 
		  covariate.labels = c("Candidate B Ahead", 
		  					 "Winning Probability",
		  					 "Candidate B Ahead x Winning Probability",
		  					 "Intercept Average"),
		  column.labels=c("Certainty", 
		  				"Vote Share",
		  				"Probability of Victory")
)


###############
###############
### Study 2 ###
###############
###############


load("data/longform.RData")
longdata$abs_prob_dist <- abs(longdata$probability - .5)
longdata$abs_margin_dist <- abs(longdata$margin/100 - .5)

# Main results
prob_margin_abs <- lmer(voted~abs_prob_dist + abs_margin_dist +
							trial + (1|V1), longdata)
stargazer(prob_margin_abs,
		  type = "text", 
		  star.cutoffs = c(0.05, 0.01, 0.001))

# Are people figureing it out after a few trials? 
longdata$Trial <- as.numeric(as.character(longdata$trial))
prob_margin_abs <- lmer(voted~abs_prob_dist*Trial + abs_margin_dist*Trial +
							(1|V1), longdata)
stargazer(prob_margin_abs,
		  type = "text", 
		  star.cutoffs = c(0.05, 0.01, 0.001))
# No


# Does directionality matter? 
prob_margin_abs_directional <- lmer(voted~abs_prob_dist + abs_margin_dist +
										probability + 
										margin + 
										as.factor(trial) + (1|V1), longdata)
prob_margin_directional <- lmer(voted~probability + 
									margin + 
									as.factor(trial) + (1|V1), longdata)

stargazer(prob_margin_directional, prob_margin_abs_directional,
		  type = "text",
		  star.cutoffs = c(0.05, 0.01, 0.001),
		  covariate.labels = c("abs(Midpoint Distance, Probability)", 
		  					 "abs(Midpoint Distance, Vote Share)",
		  					 "Probability",
		  					 "Vote Share", "Trial 2", "Trial 3","Trial 4", "Trial 5", 
		  					 "Intercept Average"))
# No

# Look at standardized effects: 
# SD shift effects: 
longdata$abs_prob_dist_norm <- longdata$abs_prob_dist/sd(longdata$abs_prob_dist, na.rm = TRUE)
longdata$abs_prob_dist_norm <- longdata$abs_prob_dist_norm - mean(longdata$abs_prob_dist_norm, na.rm=TRUE)
longdata$abs_margin_dist_norm <- longdata$abs_margin_dist/sd(longdata$abs_margin_dist, na.rm = TRUE)
longdata$abs_margin_dist_norm <- longdata$abs_margin_dist_norm - mean(longdata$abs_margin_dist_norm, na.rm = TRUE)

mean(longdata$abs_prob_dist_norm, na.rm=TRUE)
mean(longdata$abs_margin_dist_norm, na.rm=TRUE)
sd(longdata$abs_prob_dist_norm, na.rm=TRUE)
sd(longdata$abs_margin_dist_norm, na.rm=TRUE)

prob_margin_abs_norm <- lmer(voted~abs_prob_dist_norm + abs_margin_dist_norm +
							 	as.factor(trial) + (1|V1), longdata)
stargazer(prob_margin_abs_norm,
		  type = "text",
		  star.cutoffs = c(0.05, 0.01, 0.001),
		  covariate.labels = c("abs(Midpoint Distance, Probability), Normalized", 
		  					 "abs(Midpoint Distance, Vote Share), Normalized", 
		  					 "Trial 2", "Trial 3","Trial 4", "Trial 5", 
		  					 "Intercept Average"))

##############################
### Game N Study and order ###
##############################

AggregatorGameNQualtrics <- read_csv("data/AggregatorGameNQualtrics.csv")
table(AggregatorGameNQualtrics$gid)

### Reshape to long form
longdata <- melt.data.table(data.table(AggregatorGameNQualtrics,key = "gid"),
							measure.vars = list(
								c("round1.probability","round2.probability","round3.probability","round4.probability","round5.probability"),
								c("round1.margin","round2.margin","round3.margin","round4.margin","round5.margin"),
								c("round1.partTeam","round2.partTeam","round3.partTeam","round4.partTeam","round5.partTeam"),
								c("round1.awardAmount","round2.awardAmount","round3.awardAmount","round4.awardAmount","round5.awardAmount"),
								c("Q43","Q53","Q63","Q73","Q83"),
								c("roundOutcome1.wallet","roundOutcome2.wallet","roundOutcome3.wallet","roundOutcome4.wallet","roundOutcome5.wallet"),
								c("round1.n","round2.n","round3.n","round4.n","round5.n")),
							value.name  = c("probability","margin","partTeam","awardAmount","vote","wallet","n"),
							variable.name = "trial",
							id.vars = c("order","gid"))
table(longdata$vote)

# 0,1 Turnout Variable
longdata<- na.omit(longdata)
longdata$voted <-   car::recode(longdata$vote,"2=0")
longdata$probability <- as.numeric(longdata$probability)
longdata$margin <- as.numeric(longdata$margin)

longdata$abs_prob_dist <- abs(longdata$probability - .5)
longdata$abs_margin_dist <- abs(longdata$margin/100 - .5)
hist(longdata$abs_prob_dist)
longdata$trial <- as.factor(longdata$trial)
prob_margin_abs <- lme4::lmer(voted~abs_prob_dist*n + abs_margin_dist*n + trial + order + (1|gid), longdata)
summary(prob_margin_abs)
stargazer(prob_margin_abs,
		  star.cutoffs = c(0.05, 0.01, 0.001),
		  covariate.labels = c("abs(Midpoint Distance, Probability)", "Group Size (N)","abs(Midpoint Distance, Vote Share)", "Trial 2", "Trial 3","Trial 4", "Trial 5", "Order", "abs(Midpoint Distance, Probability) X Group Size (N)","abs(Midpoint Distance, Vote Share) X Group Size (N)","Intercept Average"))
<<<<<<< HEAD

##############################
### Game N (Small) Study and order ###
##############################

AggregatorGameNQualtrics <- read_csv("data/AggregatorGameNSmall.csv")
table(AggregatorGameNQualtrics$ResponseId)

### Reshape to long form
longdata <- melt.data.table(data.table(AggregatorGameNQualtrics,key = "ResponseId"),
							measure.vars = list(
								c("round1.probability","round2.probability","round3.probability","round4.probability","round5.probability"),
								c("round1.margin","round2.margin","round3.margin","round4.margin","round5.margin"),
								c("round1.partTeam","round2.partTeam","round3.partTeam","round4.partTeam","round5.partTeam"),
								c("round1.awardAmount","round2.awardAmount","round3.awardAmount","round4.awardAmount","round5.awardAmount"),
								c("Q43","Q53","Q63","Q73","Q83"),
								c("roundOutcome1.wallet","roundOutcome2.wallet","roundOutcome3.wallet","roundOutcome4.wallet","roundOutcome5.wallet"),
								c("round1.n","round2.n","round3.n","round4.n","round5.n")),
							value.name  = c("probability","margin","partTeam","awardAmount","vote","wallet","n"),
							variable.name = "trial",
							id.vars = c("order","ResponseId"))
table(longdata$vote)
head(longdata)
# 0,1 Turnout Variable
longdata<- na.omit(longdata)
longdata$voted[longdata$vote == "Vote for your team ($1 cost)"] <- 1
longdata$voted[longdata$vote != "Vote for your team ($1 cost)"] <- 0

longdata$probability <- as.numeric(longdata$probability)
longdata$margin <- as.numeric(longdata$margin)

longdata$abs_prob_dist <- abs(longdata$probability - .5)
longdata$abs_margin_dist <- abs(longdata$margin/100 - .5)
hist(longdata$abs_prob_dist)
longdata$trial <- as.factor(longdata$trial)
longdata$ResponseId <- as.factor(longdata$ResponseId)
longdata$n <- as.factor(longdata$n)
prob_margin_abs <- lme4::lmer(voted~abs_prob_dist*n + abs_margin_dist*n + trial + order + (1|ResponseId), longdata, REML = FALSE)
summary(prob_margin_abs)

stargazer(prob_margin_abs,
		  star.cutoffs = c(0.05, 0.01, 0.001),report = ('vc*p'))

prob_margin_abs <- lme4::lmer(voted~abs_prob_dist+n + abs_margin_dist + trial + order + (1|ResponseId), longdata)
summary(prob_margin_abs)
=======
	>>>>>>> 8e040892daf5f483dfac2c0570458751afd540c4
